Three sources of data:

Rui Chen’s Human Retina Cell Atlas and fetal Human Retina Cell Atlas

And GTEx’s prototype single cell atlas.

Why not David’s plae resource?

Well, I’d rather collect another citation, but NR6A1 isn’t well detected in single cell RNAseq (not shown why). All of the resources used here are single nucleus RNA-seq - which capture the RNA from the nucleus of the cell.

tldr

Evidence of higher NR6A1 (in both fetal and adult retina datasets) in the horizontal cells (HC).

In the non-retina GTEx dataset, NR6A1 is a bit higher in epithelial cells. No evidence of NR6A1 in (adult) RPE, though.

Dataset

snRNA h5ad retrieved from here and here on 2024-05-15.

Downloaded adult and fetal

Download the GTEx v8 data on 2024-05-16.

Methods

The single nucleus data was aggregated (with the ADPBulk python package) to the cell type and sample level by summing the counts. This is also known as a “pseudobulk” process as it turns single cells into a data type with the rough properties of a bulk RNA-seq dataset.

“pseudobulk” samples with lower overall counts (derived by hand inspecting) the overall distributions of overall expression were removed.

The data was CPM scaled and log1p transformed for plotting.

mamba activate scvi1.0.4
python src/adata_to_pseudobulk.py ~/data/chen_rca/chen_fetal_hrca_snRNA_88444d73-7f55-4a62-bcfe-e929878c6c78.h5ad donor_id,development_stage,majorclass data/chen_fetal_hrca.pb_raw.csv.gz

python src/adata_to_pseudobulk.py ~/data/chen_rca/chen_adult_hrca_snRNA_89f6a640-0537-4fd1-bdf9-540db9dd0b7d.h5ad donor_id,majorclass data/chen_adult_hrca.pb_raw.csv.gz

python src/adata_to_pseudobulk.py /Users/mcgaugheyd/data/gtex/GTEx_8_tissues_snRNAseq_atlas_071421.public_obs.h5ad  Sample\ ID,Tissue,Broad\ cell\ type data/gtex_v8_pb.csv.gz
library(tidyverse)
pb <- data.table::fread("../data/chen_adult_hrca.pb_raw.csv.gz")
pb_mat <- pb[,2:ncol(pb)] %>% as.matrix()
row.names(pb_mat) <- pb %>% pull(1)
pb_mat <- pb_mat[pb_mat %>% rowSums() %>% log1p() > 12,]


pb_transform <- t(metamoRph::normalize_data(t(pb_mat), log1p = TRUE))

Pull gene names

conv_table <- AnnotationDbi::select(org.Hs.eg.db::org.Hs.eg.db, 
                                    keys=colnames(pb),
                                    columns=c("ENSEMBL","SYMBOL", "MAP","GENENAME", "ENTREZID"), keytype="ENSEMBL") 
conv_table %>% head()
#>           ENSEMBL SYMBOL      MAP
#> 1              V1   <NA>     <NA>
#> 2 ENSG00000000003 TSPAN6   Xq22.1
#> 3 ENSG00000000005   TNMD   Xq22.1
#> 4 ENSG00000000419   DPM1 20q13.13
#> 5 ENSG00000000457  SCYL3   1q24.2
#> 6 ENSG00000000460  FIRRM   1q24.2
#>                                                      GENENAME ENTREZID
#> 1                                                        <NA>     <NA>
#> 2                                               tetraspanin 6     7105
#> 3                                                 tenomodulin    64102
#> 4 dolichyl-phosphate mannosyltransferase subunit 1, catalytic     8813
#> 5                                    SCY1 like pseudokinase 3    57147
#> 6   FIGNL1 interacting regulator of recombination and mitosis    55732
pb_long <- pb_transform %>% as_tibble(rownames = 'info') %>% separate(info, c("donor","class"), sep = '-majorclass\\.') %>% 
  mutate(donor = gsub("donor_id\\.","",donor)) %>% 
  pivot_longer(cols = starts_with("ENSG"))

pb_long %>% 
  left_join(conv_table %>% group_by(ENSEMBL) %>% summarise(SYMBOL = paste(SYMBOL, collapse= ', ')), 
            by = c("name" = "ENSEMBL")) %>% 
  filter(SYMBOL == 'NR6A1') %>% 
  ggplot(aes(x=class,y=(value))) + 
  geom_boxplot() +
  ggbeeswarm::geom_quasirandom() +
  ylab("log1p(NR6A1)") +
  cowplot::theme_cowplot() +
  ggtitle("Pseudobulk Expression of NR6A1 in HRCA")

fetal

pb_fetal <- data.table::fread("../data/chen_fetal_hrca.pb_raw.csv.gz")
pb_mat_fetal <- pb_fetal[,2:ncol(pb_fetal)] %>% as.matrix()
row.names(pb_mat_fetal) <- pb_fetal %>% pull(1)
# pb_mat %>% rowSums() %>% log1p() %>%  density() %>% plot()
pb_mat_fetal <- pb_mat_fetal[pb_mat_fetal %>% rowSums() %>% log1p() > 12,]


pb_transform_fetal <- t(metamoRph::normalize_data(t(pb_mat_fetal), log1p = TRUE))


pb_long_fetal <- pb_transform_fetal %>% as_tibble(rownames = 'info') %>% separate(info, c("donor","class"), sep = '-majorclass\\.') %>% 
  mutate(age = str_extract(donor, '\\d+th|\\d+st') %>% gsub('th|st','',.) %>% as.integer()) %>% 
  pivot_longer(cols = starts_with("ENSG"))
pb_long_fetal %>% 
  left_join(conv_table %>% group_by(ENSEMBL) %>% summarise(SYMBOL = paste(SYMBOL, collapse= ', ')), 
            by = c("name" = "ENSEMBL")) %>% 
  filter(SYMBOL == 'NR6A1') %>% 
  ggplot(aes(x=class,y=(value))) + 
  facet_wrap(~age) +
  geom_boxplot() +
  ggbeeswarm::geom_quasirandom() +
  ylab("log1p(NR6A1)") +
  cowplot::theme_cowplot() +
  ggtitle("Pseudobulk Expression of NR6A1 in fetal HRCA (facet by age(weeks))") +
  scale_x_discrete(guide = guide_axis(angle = 90))

pb_long_fetal %>% 
  left_join(conv_table %>% group_by(ENSEMBL) %>% summarise(SYMBOL = paste(SYMBOL, collapse= ', ')), 
            by = c("name" = "ENSEMBL")) %>% 
  filter(SYMBOL == 'NR6A1') %>% 
  ggplot(aes(x=class,y=(value), color = age)) + 
  geom_boxplot() +
  ggbeeswarm::geom_quasirandom() +
  ylab("log1p(NR6A1)") +
  cowplot::theme_cowplot() +
  ggtitle("Pseudobulk Expression of NR6A1 in fetal HRCA") +
  scale_x_discrete(guide = guide_axis(angle = 90)) +
  labs(color='age(weeks)') +
  scale_color_viridis_c()

# gtex also snRNA

pb_gtex <- data.table::fread("../data/gtex_v8_pb.csv.gz")
pb_mat_gtex <- pb_gtex[,2:ncol(pb_gtex)] %>% as.matrix()
row.names(pb_mat_gtex) <- pb_gtex %>% pull(1)
# pb_mat %>% rowSums() %>% log1p() %>%  density() %>% plot()
pb_mat_gtex <- pb_mat_gtex[pb_mat_gtex %>% rowSums() %>% log1p() > 9,]


pb_transform_gtex <- t(metamoRph::normalize_data(t(pb_mat_gtex), log1p = TRUE))


pb_long_gtex <- pb_transform_gtex %>% 
  as_tibble(rownames = 'info') %>% 
  mutate(class = str_extract(info, "Broad cell type\\.\\w+") %>% gsub("Broad cell type\\.", "",.),
         tissue = str_extract(info, "Tissue.\\w+") %>% gsub("Tissue.", "",.)) %>% 
  pivot_longer(cols = -c(info, class, tissue))
pb_long_gtex %>% 
  filter(name == 'NR6A1') %>% 
  ggplot(aes(x=class,y=value)) + 
  geom_boxplot() +
  ggbeeswarm::geom_quasirandom(aes(color=tissue)) +
  ylab("log1p(NR6A1)") +
  cowplot::theme_cowplot() +
  ggtitle("Pseudobulk Expression of NR6A1 in GTEx") +
  scale_color_manual(values = pals::polychrome() %>% unname()) +
  scale_x_discrete(guide = guide_axis(angle = 90))

pb_long_gtex %>% 
  filter(name == 'NR6A1') %>% 
  ggplot(aes(x=interaction(tissue,class),y=value)) + 
  geom_boxplot() +
  ggbeeswarm::geom_quasirandom(aes(color=tissue)) +
  ylab("log1p(NR6A1)") +
  cowplot::theme_cowplot() +
  ggtitle("Pseudobulk Expression of NR6A1 in GTEx") +
  scale_color_manual(values = pals::polychrome() %>% unname()) +
  scale_x_discrete(guide = guide_axis(angle = 90))

session

devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.3.0 (2023-04-21)
#>  os       macOS Ventura 13.6.6
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       America/New_York
#>  date     2024-05-16
#>  pandoc   3.1.11 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package              * version   date (UTC) lib source
#>  abind                  1.4-5     2016-07-21 [1] CRAN (R 4.3.0)
#>  AnnotationDbi          1.61.2    2023-03-24 [1] Bioconductor
#>  beachmat               2.15.0    2022-12-20 [1] Bioconductor
#>  beeswarm               0.4.0     2021-06-01 [1] CRAN (R 4.3.0)
#>  Biobase                2.60.0    2023-04-25 [1] Bioconductor
#>  BiocGenerics           0.46.0    2023-04-25 [1] Bioconductor
#>  BiocNeighbors          1.17.1    2022-12-20 [1] Bioconductor
#>  BiocParallel           1.33.11   2023-03-24 [1] Bioconductor
#>  BiocSingular           1.15.0    2022-12-20 [1] Bioconductor
#>  Biostrings             2.67.2    2023-04-19 [1] Bioconductor
#>  bit                    4.0.5     2022-11-15 [1] CRAN (R 4.3.0)
#>  bit64                  4.0.5     2020-08-30 [1] CRAN (R 4.3.0)
#>  bitops                 1.0-7     2021-04-24 [1] CRAN (R 4.3.0)
#>  blob                   1.2.4     2023-03-17 [1] CRAN (R 4.3.0)
#>  bluster                1.9.1     2023-01-13 [1] Bioconductor
#>  bslib                  0.6.1     2023-11-28 [1] CRAN (R 4.3.1)
#>  cachem                 1.0.8     2023-05-01 [1] CRAN (R 4.3.0)
#>  callr                  3.7.3     2022-11-02 [1] CRAN (R 4.3.0)
#>  cli                    3.6.1     2023-03-23 [1] CRAN (R 4.3.0)
#>  cluster                2.1.4     2022-08-22 [1] CRAN (R 4.3.0)
#>  codetools              0.2-19    2023-02-01 [1] CRAN (R 4.3.0)
#>  colorspace             2.1-0     2023-01-23 [1] CRAN (R 4.3.0)
#>  cowplot                1.1.1     2020-12-30 [1] CRAN (R 4.3.0)
#>  crayon                 1.5.2     2022-09-29 [1] CRAN (R 4.3.0)
#>  data.table             1.14.8    2023-02-17 [1] CRAN (R 4.3.0)
#>  DBI                    1.1.3     2022-06-18 [1] CRAN (R 4.3.0)
#>  DelayedArray           0.26.7    2023-07-30 [1] Bioconductor
#>  DelayedMatrixStats     1.21.0    2022-12-20 [1] Bioconductor
#>  devtools               2.4.5     2022-10-11 [1] CRAN (R 4.3.0)
#>  dichromat              2.0-0.1   2022-05-02 [1] CRAN (R 4.3.0)
#>  digest                 0.6.33    2023-07-07 [1] CRAN (R 4.3.0)
#>  dplyr                * 1.1.2     2023-04-20 [1] CRAN (R 4.3.0)
#>  dqrng                  0.3.0     2021-05-01 [1] CRAN (R 4.3.0)
#>  edgeR                  3.42.2    2023-05-02 [1] Bioconductor
#>  ellipsis               0.3.2     2021-04-29 [1] CRAN (R 4.3.0)
#>  evaluate               0.21      2023-05-05 [1] CRAN (R 4.3.0)
#>  fansi                  1.0.4     2023-01-22 [1] CRAN (R 4.3.0)
#>  farver                 2.1.1     2022-07-06 [1] CRAN (R 4.3.0)
#>  fastmap                1.1.1     2023-02-24 [1] CRAN (R 4.3.0)
#>  forcats              * 1.0.0     2023-01-29 [1] CRAN (R 4.3.0)
#>  fs                     1.6.3     2023-07-20 [1] CRAN (R 4.3.0)
#>  generics               0.1.3     2022-07-05 [1] CRAN (R 4.3.0)
#>  GenomeInfoDb           1.36.1    2023-07-02 [1] Bioconductor
#>  GenomeInfoDbData       1.2.10    2023-05-08 [1] Bioconductor
#>  GenomicRanges          1.52.0    2023-04-25 [1] Bioconductor
#>  ggbeeswarm             0.7.2     2023-04-29 [1] CRAN (R 4.3.0)
#>  ggplot2              * 3.4.2     2023-04-03 [1] CRAN (R 4.3.0)
#>  glue                   1.6.2     2022-02-24 [1] CRAN (R 4.3.0)
#>  gtable                 0.3.3     2023-03-21 [1] CRAN (R 4.3.0)
#>  highr                  0.10      2022-12-22 [1] CRAN (R 4.3.0)
#>  hms                    1.1.3     2023-03-21 [1] CRAN (R 4.3.0)
#>  htmltools              0.5.7     2023-11-03 [1] CRAN (R 4.3.1)
#>  htmlwidgets            1.6.2     2023-03-17 [1] CRAN (R 4.3.0)
#>  httpuv                 1.6.11    2023-05-11 [1] CRAN (R 4.3.0)
#>  httr                   1.4.6     2023-05-08 [1] CRAN (R 4.3.0)
#>  igraph                 1.4.3     2023-05-22 [1] CRAN (R 4.3.0)
#>  IRanges                2.34.1    2023-07-02 [1] Bioconductor
#>  irlba                  2.3.5.1   2022-10-03 [1] CRAN (R 4.3.0)
#>  jquerylib              0.1.4     2021-04-26 [1] CRAN (R 4.3.0)
#>  jsonlite               1.8.7     2023-06-29 [1] CRAN (R 4.3.0)
#>  KEGGREST               1.39.0    2022-12-20 [1] Bioconductor
#>  knitr                  1.43      2023-05-25 [1] CRAN (R 4.3.0)
#>  labeling               0.4.2     2020-10-20 [1] CRAN (R 4.3.0)
#>  later                  1.3.1     2023-05-02 [1] CRAN (R 4.3.0)
#>  lattice                0.21-8    2023-04-05 [1] CRAN (R 4.3.0)
#>  lifecycle              1.0.3     2022-10-07 [1] CRAN (R 4.3.0)
#>  limma                  3.56.1    2023-05-07 [1] Bioconductor
#>  locfit                 1.5-9.7   2023-01-02 [1] CRAN (R 4.3.0)
#>  lubridate            * 1.9.2     2023-02-10 [1] CRAN (R 4.3.0)
#>  magrittr               2.0.3     2022-03-30 [1] CRAN (R 4.3.0)
#>  mapproj                1.2.11    2023-01-12 [1] CRAN (R 4.3.0)
#>  maps                   3.4.1     2022-10-30 [1] CRAN (R 4.3.0)
#>  Matrix                 1.6-5     2024-01-11 [1] CRAN (R 4.3.1)
#>  MatrixGenerics         1.12.3    2023-07-30 [1] Bioconductor
#>  matrixStats            1.0.0     2023-06-02 [1] CRAN (R 4.3.0)
#>  memoise                2.0.1     2021-11-26 [1] CRAN (R 4.3.0)
#>  metamoRph              0.2.2     2023-12-06 [1] local
#>  metapod                1.7.0     2022-12-20 [1] Bioconductor
#>  mime                   0.12      2021-09-28 [1] CRAN (R 4.3.0)
#>  miniUI                 0.1.1.1   2018-05-18 [1] CRAN (R 4.3.0)
#>  munsell                0.5.0     2018-06-12 [1] CRAN (R 4.3.0)
#>  org.Hs.eg.db           3.17.0    2023-08-23 [1] Bioconductor
#>  pals                   1.8       2023-08-23 [1] CRAN (R 4.3.0)
#>  pillar                 1.9.0     2023-03-22 [1] CRAN (R 4.3.0)
#>  pkgbuild               1.4.0     2022-11-27 [1] CRAN (R 4.3.0)
#>  pkgconfig              2.0.3     2019-09-22 [1] CRAN (R 4.3.0)
#>  pkgload                1.3.3     2023-09-22 [1] CRAN (R 4.3.1)
#>  png                    0.1-8     2022-11-29 [1] CRAN (R 4.3.0)
#>  prettyunits            1.1.1     2020-01-24 [1] CRAN (R 4.3.0)
#>  processx               3.8.3     2023-12-10 [1] CRAN (R 4.3.1)
#>  profvis                0.3.8     2023-05-02 [1] CRAN (R 4.3.0)
#>  promises               1.2.0.1   2021-02-11 [1] CRAN (R 4.3.0)
#>  ps                     1.7.5     2023-04-18 [1] CRAN (R 4.3.0)
#>  purrr                * 1.0.1     2023-01-10 [1] CRAN (R 4.3.0)
#>  R.methodsS3            1.8.2     2022-06-13 [1] CRAN (R 4.3.0)
#>  R.oo                   1.25.0    2022-06-12 [1] CRAN (R 4.3.0)
#>  R.utils                2.12.2    2022-11-11 [1] CRAN (R 4.3.0)
#>  R6                     2.5.1     2021-08-19 [1] CRAN (R 4.3.0)
#>  Rcpp                   1.0.11    2023-07-06 [1] CRAN (R 4.3.0)
#>  RCurl                  1.98-1.12 2023-03-27 [1] CRAN (R 4.3.0)
#>  readr                * 2.1.4     2023-02-10 [1] CRAN (R 4.3.0)
#>  remotes                2.4.2     2021-11-30 [1] CRAN (R 4.3.0)
#>  rlang                  1.1.1     2023-04-28 [1] CRAN (R 4.3.0)
#>  rmarkdown              2.23      2023-07-01 [1] CRAN (R 4.3.0)
#>  RSQLite                2.3.1     2023-04-03 [1] CRAN (R 4.3.0)
#>  rstudioapi             0.14      2022-08-22 [1] CRAN (R 4.3.0)
#>  rsvd                   1.0.5     2021-04-16 [1] CRAN (R 4.3.0)
#>  S4Arrays               1.2.0     2023-10-26 [1] Bioconductor
#>  S4Vectors              0.38.1    2023-05-02 [1] Bioconductor
#>  sass                   0.4.7     2023-07-15 [1] CRAN (R 4.3.0)
#>  ScaledMatrix           1.8.1     2023-05-03 [1] Bioconductor
#>  scales                 1.2.1     2022-08-20 [1] CRAN (R 4.3.0)
#>  scran                  1.27.1    2022-12-20 [1] Bioconductor
#>  scuttle                1.9.4     2023-01-23 [1] Bioconductor
#>  sessioninfo            1.2.2     2021-12-06 [1] CRAN (R 4.3.0)
#>  shiny                  1.8.0     2023-11-17 [1] CRAN (R 4.3.1)
#>  SingleCellExperiment   1.22.0    2023-04-25 [1] Bioconductor
#>  sparseMatrixStats      1.11.1    2022-12-30 [1] Bioconductor
#>  statmod                1.5.0     2023-01-06 [1] CRAN (R 4.3.0)
#>  stringi                1.7.12    2023-01-11 [1] CRAN (R 4.3.0)
#>  stringr              * 1.5.0     2022-12-02 [1] CRAN (R 4.3.0)
#>  SummarizedExperiment   1.30.2    2023-06-11 [1] Bioconductor
#>  tibble               * 3.2.1     2023-03-20 [1] CRAN (R 4.3.0)
#>  tidyr                * 1.3.0     2023-01-24 [1] CRAN (R 4.3.0)
#>  tidyselect             1.2.0     2022-10-10 [1] CRAN (R 4.3.0)
#>  tidyverse            * 2.0.0     2023-02-22 [1] CRAN (R 4.3.0)
#>  timechange             0.2.0     2023-01-11 [1] CRAN (R 4.3.0)
#>  tzdb                   0.4.0     2023-05-12 [1] CRAN (R 4.3.0)
#>  urlchecker             1.0.1     2021-11-30 [1] CRAN (R 4.3.0)
#>  usethis                2.1.6     2022-05-25 [1] CRAN (R 4.3.0)
#>  utf8                   1.2.3     2023-01-31 [1] CRAN (R 4.3.0)
#>  vctrs                  0.6.3     2023-06-14 [1] CRAN (R 4.3.0)
#>  vipor                  0.4.5     2017-03-22 [1] CRAN (R 4.3.0)
#>  viridisLite            0.4.2     2023-05-02 [1] CRAN (R 4.3.0)
#>  withr                  2.5.0     2022-03-03 [1] CRAN (R 4.3.0)
#>  xfun                   0.42      2024-02-08 [1] CRAN (R 4.3.1)
#>  xtable                 1.8-4     2019-04-21 [1] CRAN (R 4.3.0)
#>  XVector                0.40.0    2023-04-25 [1] Bioconductor
#>  yaml                   2.3.7     2023-01-23 [1] CRAN (R 4.3.0)
#>  zlibbioc               1.46.0    2023-04-25 [1] Bioconductor
#> 
#>  [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────